/* ****************************************************************************
 * Copyright: 2017-2025 RAYLASE GmbH
 * This source code is the proprietary confidential property of RAYLASE GmbH.
 * Reproduction, publication, or any form of distribution to
 * any party other than the licensee is strictly prohibited.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#pragma once

#include <atomic>
#include <cmath>
#include <thread>
#include <vector>

// void lambda(size_t index)
template<typename LambdaT> static inline void ParallelFor(size_t count, LambdaT&& lambda)
{
	size_t threadCount = std::min<size_t>(std::max<size_t>(std::thread::hardware_concurrency(), 1), count);
	std::atomic<size_t> iter = count - 1;

	std::vector<std::thread> threads;
	threads.reserve(threadCount);
	for (size_t i = 0; i < threadCount; ++i)
		threads.emplace_back([&iter, &lambda]() {
			size_t iter2 = iter.fetch_sub(1, std::memory_order_relaxed);
			while (iter2 < SIZE_MAX / 2) // Upon overflow we are done.
			{
				lambda(iter2);
				iter2 = iter.fetch_sub(1, std::memory_order_relaxed);
			}
		});

	// Wait for finish...
	for (size_t i = 0; i < threadCount; ++i)
		threads[i].join();
}